import asyncio
import uuid
from typing import Dict, Any, Optional
from datetime import datetime

class QueueFullError(Exception):
    """Raised when the request queue is at max"""
    pass

class RequestQueueManager:
    """
    Manages a queue of inference requests so only one model inference
    runs at a time. Uses a background worker to process requests in FIFO order.
    """

    def __init__(self, max_queue_size: int = 10, request_timeout: int = 300):
        self._queue = asyncio.Queue(maxsize=max_queue_size)
        self._processing = False
        self._current_request_id = None
        self._request_timeout = request_timeout
        self._active_requests: Dict[str, Any] = {}
        self._worker_task: Optional[asyncio.Task] = None

    async def start_worker(self, processor_func):
        """Start the background worker once (idempotent)."""
        if self._worker_task is None or self._worker_task.done():
            self._worker_task = asyncio.create_task(self._worker(processor_func))

    async def _worker(self, processor_func):
        """Background loop to consume requests in FIFO order."""
        while True:
            try:
                request_item = await self._queue.get()
                request_id = request_item["id"]
                self._processing = True
                self._current_request_id = request_id

                try:
                    # Timeout check
                    wait_time = (datetime.now() - request_item["timestamp"]).total_seconds()
                    if wait_time > self._request_timeout:
                        request_item["future"].set_exception(
                            TimeoutError(
                                f"Request {request_id} timed out after {wait_time:.1f}s"
                            )
                        )
                    else:
                        result = await processor_func(request_item["data"])
                        request_item["future"].set_result(result)

                except Exception as e:
                    if not request_item["future"].done():
                        request_item["future"].set_exception(e)

                finally:
                    self._active_requests.pop(request_id, None)
                    self._processing = False
                    self._current_request_id = None
                    self._queue.task_done()
                    
            except asyncio.CancelledError:
                break
            except Exception as e:
                print(f"Queue worker error: {e}")
                await asyncio.sleep(1)  # Prevent tight error loop

    async def add_request(self, request_data: Dict[str, Any]):
        """Add a request and return (request_id, future)."""
        request_id = str(uuid.uuid4())
        future = asyncio.get_event_loop().create_future()
        request_item = {
            "id": request_id,
            "data": request_data,
            "timestamp": datetime.now(),
            "future": future,
        }
        try:
            self._queue.put_nowait(request_item)  # fail fast if full
        except asyncio.QueueFull:
            raise QueueFullError("Queue is full")

        self._active_requests[request_id] = request_item
        return request_id, future

    def get_queue_status(self):
        return {
            "queue_size": self._queue.qsize(),
            "max_size": self._queue.maxsize,
            "is_processing": self._processing,
            "current_request_id": self._current_request_id,
            "active_requests": len(self._active_requests),
        }

    async def shutdown(self):
        """Gracefully shutdown the worker"""
        if self._worker_task and not self._worker_task.done():
            self._worker_task.cancel()
            try:
                await self._worker_task
            except asyncio.CancelledError:
                pass

# Global queue manager instance
request_queue = RequestQueueManager()